# -*- coding: utf-8 -*-
"""
Created on Sun Sep 15 16:49:52 2024
"""

import numpy as np
import matplotlib.pyplot as plt

data_num = 10

B_train_loss = []
B_val_loss = []
P_train_loss = []
P_val_loss = []

for i in range(1, data_num + 1):
    if i in [1,3,6,8]:
        B_train_loss.append(np.load('B' + str(i) + '_train_loss.npy'))
        B_val_loss.append(np.load('B' + str(i) + '_val_loss.npy'))
    elif i == 2:
        B_train_loss.append(np.array([6.6042,4.5656,3.8122,3.2801,2.8894,2.5780,2.3142,2.1145,1.9668]))
        B_val_loss.append(np.array([5.0399,4.0460,3.4771,2.9864,2.7337,2.5105,2.3435,2.2536,2.1964]))
    elif i == 4:
        B_train_loss.append(np.array([6.6023,4.5662,3.8213,3.2883,2.8948,2.5731,2.2961,2.0982,1.9514]))
        B_val_loss.append(np.array([5.0229,4.0374,3.4594,3.0701,2.7383,2.5059,2.3789,2.2345,2.1619]))
    elif i == 5:
        B_train_loss.append(np.array([6.6052,4.5539,3.8124,3.2898,2.8900,2.5672,2.2935,2.1019,1.9542]))
        B_val_loss.append(np.array([5.0202,4.0786,3.4213,2.9954,2.7397,2.4904,2.3616,2.2593,2.1739]))
    elif i == 7:
        B_train_loss.append(np.array([6.6122,4.5732,3.8264,3.2967,2.8910,2.5570,2.2852,2.0945]))
        B_val_loss.append(np.array([5.0562,4.0448,3.4986,3.0353,2.7275,2.4831,2.3337,2.2295]))
    elif i == 9:
        B_train_loss.append(np.array([6.5844,4.5588,3.8134,3.2622,2.8649,2.5565,2.2939,2.1041]))
        B_val_loss.append(np.array([5.0262,4.0284,3.4243,2.9454,2.7562,2.4697,2.3755,2.2483]))
    else:
        B_train_loss.append(np.array([6.6115,4.5674,3.8149,3.2916,2.8904,2.5721,2.3003,2.1062,1.9567]))
        B_val_loss.append(np.array([5.0426,4.1129,3.4842,2.9951,2.7738,2.4749,2.3740,2.2375,2.1675]))
    P_train_loss.append(np.load('P' + str(i) + '_train_loss.npy'))
    P_val_loss.append(np.load('P' + str(i) + '_val_loss.npy'))

# # Load loss data from two .npy files (assume each file contains two arrays: training loss and validation loss)
# train_loss_1 = np.load('Base_train_loss.npy')
# val_loss_1 = np.load('Base_val_loss.npy')

# train_loss_2 = np.load('Cat_train_loss.npy')
# val_loss_2 = np.load('Cat_val_loss.npy')

# # Check if the losses have the same length for comparison
# assert len(train_loss_1) == len(val_loss_1), "Training and validation losses for model 1 must have the same length."
# assert len(train_loss_2) == len(val_loss_2), "Training and validation losses for model 2 must have the same length."

# Plot the training and validation loss for both models

plt.figure(figsize=(10, 6))

# for i in range(0, data_num):
#     plt.plot(epochs, B_train_loss[i], 'r-', label='Training Loss (Baseline)', linestyle='--', marker='o', alpha=0.2)
#     plt.plot(epochs, B_val_loss[i], 'r-', label='Validation Loss (Baseline)', marker='s', alpha=0.2)
#     plt.plot(epochs, P_train_loss[i], 'b-', label='Training Loss (Proposed method)', linestyle='--', marker='o', alpha=0.2)
#     plt.plot(epochs, P_val_loss[i], 'b-', label='Validation Loss (Concatenation)', marker='s', alpha=0.2)

for i in range(0, data_num):
    if (i+1) in [1,3,6,8]:
        epochs = range(1, data_num + 1)  # Number of epochs 10
    elif (i+1) in [2,4,5,10]:
        epochs = range(1, data_num)  # Number of epochs 9
    else:
        epochs = range(1, data_num - 1)  # Number of epochs 8
    if i == 0:
        # plt.plot(epochs, B_train_loss[i], 'r-', label='Training Loss (Baseline)', linestyle='--', marker='o', alpha=0.5)
        plt.plot(epochs, B_train_loss[i], c='#0173b2', label='Training Loss (Baseline)', linestyle='--', marker='o', alpha=0.5)
    else:
        plt.plot(epochs, B_train_loss[i], c='#0173b2', linestyle='--', marker='o', alpha=0.5)

for i in range(0, data_num):  
    if (i+1) in [1,3,6,8]:
        epochs = range(1, data_num + 1)  # Number of epochs 10
    elif (i+1) in [2,4,5,10]:
        epochs = range(1, data_num)  # Number of epochs 9
    else:
        epochs = range(1, data_num - 1)  # Number of epochs 8
    if i == 0:
        plt.plot(epochs, B_val_loss[i], c='#0173b2', label='Validation Loss (Baseline)', marker='s', alpha=0.5)
    else:
        plt.plot(epochs, B_val_loss[i], c='#0173b2', marker='s', alpha=0.5)

epochs = range(1, data_num + 1)  # Number of epochs

for i in range(0, data_num):  
    if i == 0:
        plt.plot(epochs, P_train_loss[i], c='#de8f05', label='Training Loss (Proposed method)', linestyle='--', marker='o', alpha=0.5)
    else:
        plt.plot(epochs, P_train_loss[i], c='#de8f05', linestyle='--', marker='o', alpha=0.5)
    
for i in range(0, data_num):  
    if i == 0:
        plt.plot(epochs, P_val_loss[i], c='#de8f05', label='Validation Loss (Proposed method)', marker='s', alpha=0.5)
    else:
        plt.plot(epochs, P_val_loss[i], c='#de8f05', marker='s', alpha=0.5)
    
    # plt.plot(epochs, P_train_loss[i], 'b-', label='Training Loss (Proposed method)', linestyle='--', marker='o', alpha=0.2)
    # plt.plot(epochs, P_val_loss[i], 'b-', label='Validation Loss (Concatenation)', marker='s', alpha=0.2)


# Labels and title
plt.title('Comparison of Training and Validation Losses', fontsize=30)
plt.xlabel('Epochs', fontsize=25)
plt.ylabel('Loss', fontsize=25)
plt.legend(fontsize=20)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
# Show plot
plt.grid(True)
plt.show()

# # Model 1 Losses
# plt.plot(epochs, train_loss_1, 'r-', label='Training Loss (Baseline)', linestyle='--', marker='o')
# plt.plot(epochs, val_loss_1, 'r-', label='Validation Loss (Baseline)', marker='s')

# # Model 2 Losses
# plt.plot(epochs, train_loss_2, 'b-', label='Training Loss (Concatenation)', linestyle='--', marker='o')
# plt.plot(epochs, val_loss_2, 'b-', label='Validation Loss (Concatenation)', marker='s')

# # Labels and title
# plt.title('Comparison of Training and Validation Losses', fontsize=30)
# plt.xlabel('Epochs', fontsize=25)
# plt.ylabel('Loss', fontsize=25)
# plt.legend(fontsize=20)
# plt.xticks(fontsize=20)
# plt.yticks(fontsize=20)
# # Show plot
# plt.grid(True)
# plt.show()

import pandas as pd

#%% B_train_loss

print([len(arr) for arr in B_train_loss])

# Find the length of the longest numpy array
max_len = max(len(arr) for arr in B_train_loss)

# Pad each array with NaN to match the maximum length
padded_data = [np.pad(arr, (0, max_len - len(arr)), constant_values=np.nan) for arr in B_train_loss]

# Create a DataFrame from the padded arrays
B_train_loss_df = pd.DataFrame(padded_data)

print(B_train_loss_df)

# Calculate statistics for each column
B_train_loss_stats = pd.DataFrame({
    'mean': B_train_loss_df.mean(),
    'std': B_train_loss_df.std(),
    'min': B_train_loss_df.min(),
    'max': B_train_loss_df.max(),
    'median': B_train_loss_df.median()
})

print(B_train_loss_stats)

#%% P_train_loss

print([len(arr) for arr in P_train_loss])

# Find the length of the longest numpy array
max_len = max(len(arr) for arr in P_train_loss)

# Pad each array with NaN to match the maximum length
padded_data = [np.pad(arr, (0, max_len - len(arr)), constant_values=np.nan) for arr in P_train_loss]

# Create a DataFrame from the padded arrays
P_train_loss_df = pd.DataFrame(padded_data)

print(P_train_loss_df)

# Calculate statistics for each column
P_train_loss_stats = pd.DataFrame({
    'mean': P_train_loss_df.mean(),
    'std': P_train_loss_df.std(),
    'min': P_train_loss_df.min(),
    'max': P_train_loss_df.max(),
    'median': P_train_loss_df.median()
})

print(P_train_loss_stats)

#%% B_val_loss

print([len(arr) for arr in B_val_loss])

# Find the length of the longest numpy array
max_len = max(len(arr) for arr in B_train_loss)

# Pad each array with NaN to match the maximum length
padded_data = [np.pad(arr, (0, max_len - len(arr)), constant_values=np.nan) for arr in B_val_loss]

# Create a DataFrame from the padded arrays
B_val_loss_df = pd.DataFrame(padded_data)

print(B_val_loss_df)

# Calculate statistics for each column
B_val_loss_stats = pd.DataFrame({
    'mean': B_val_loss_df.mean(),
    'std': B_val_loss_df.std(),
    'min': B_val_loss_df.min(),
    'max': B_val_loss_df.max(),
    'median': B_val_loss_df.median()
})

print(B_val_loss_stats)

#%% P_val_loss

print([len(arr) for arr in P_val_loss])

# Find the length of the longest numpy array
max_len = max(len(arr) for arr in P_val_loss)

# Pad each array with NaN to match the maximum length
padded_data = [np.pad(arr, (0, max_len - len(arr)), constant_values=np.nan) for arr in P_val_loss]

# Create a DataFrame from the padded arrays
P_val_loss_df = pd.DataFrame(padded_data)

print(P_val_loss_df)

# Calculate statistics for each column
P_val_loss_stats = pd.DataFrame({
    'mean': P_val_loss_df.mean(),
    'std': P_val_loss_df.std(),
    'min': P_val_loss_df.min(),
    'max': P_val_loss_df.max(),
    'median': P_val_loss_df.median()
})

print(P_val_loss_stats)